# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 06:29:09 2020

@author: HP
"""


from os.path import isfile
import numpy as np
import pandas as pd
from scipy.optimize import linprog
from increasing_closures import impatience2, patience2
from revealed_python.garp_NOQ import garp_NOQ_find_e



#=============================================================================
#DU Test
#=============================================================================

def PQR_time_test_find_e( C, P, return_deltas = False, deltas = np.linspace(0,1.,21) ):
    best_e = 0.0
    delta_low = 1.0
    delta_high = 0.0
    
    for delta in deltas:
        
        if (best_e == 1.0) & (return_deltas == False): 
            break
        high_e = 1.0
        low_e = 0.0
        e = 1.0
        #Will binary search until we have narrowed the window enough.
        while high_e - low_e >= 2**(-15):
            
            if PQR_time_test( C,P, delta, e ):
                low_e = e #Our e passes so raise the lower bound.
            else:
                high_e = e #Our e failed so lower the upper bound.
                if high_e < best_e: 
                    break
            e = (high_e + low_e) / 2.0 #New e is the midpoint.
        if e == best_e:
            if delta < delta_low:
                delta_low = delta
            if delta > delta_high:
                delta_high = delta
        if e > best_e:
            best_e = e
            delta_low = delta
            delta_high = delta
    if return_deltas == False:
        return best_e
    else:
        return { 'e': best_e, 'delta_low': delta_low, 'delta_high': delta_high }

def PQR_time_test(C,P, delta, e = 1.0):
    
    N = C.shape[0]
    
    #Make a list of payouts
    payouts = np.concatenate( (C[:,0], C[:,1]),axis=0)
    #Make payouts list unique
    payouts = np.unique( payouts, axis = 0 )
    #Turn payouts into a list of values (instead of a list of numpy arrays)
    payouts = [ float(payout) for payout in payouts ]
    
    #make payout dictionary. Pass it a payout (a value) it will return the index number.
    payout_dict = { payouts[i]:i for i in range(len(payouts)) }
    
    #For each observation make an RP list. That is, a list of inferior payout pairs.
    RP_list = [ [ ( payout_dict[pay1], payout_dict[pay2] ) for pay1 in payouts for pay2 in payouts if (e * (P[n,:] @ C[n,:])) > P[n,:] @ np.array([ pay1, pay2]) ] for n in range(N) ]
    
    #Make linear program where the number of payouts is the number of vars and the number of total RPs is the number of constraints.
    varsN = len(payouts)
    constraintsN = sum( len( RP_list[n] ) for n in range(len(RP_list)) )
    
    #Constraint matrix
    A = np.zeros( (constraintsN, varsN) )
    
    constrainti = 0
    for ob in range(N):
        #Get the lottery indices for ob.
        ob_pay_index = ( payout_dict[ float( C[ob,0] ) ], payout_dict[ float( C[ob,1] ) ] )
        
        RPs = RP_list[ob] #Get the constraints for the current observation.
        for RP in RPs:
            #Write the A matrix for the current ob
            A[constrainti, ob_pay_index[0]] = A[constrainti, ob_pay_index[0]].copy() - 1.0
            A[constrainti, ob_pay_index[1]] = A[constrainti, ob_pay_index[1]].copy() - delta
            
            #Write the Amatrix for the RPed stream.
            A[constrainti, RP[0]] = A[constrainti, RP[0]].copy() + 1.0
            A[constrainti, RP[1]] = A[constrainti, RP[1]].copy() + delta
            
            constrainti = constrainti + 1
    
    
    #Make obj and constraint vectors
    obj = np.zeros( varsN )
    constraint_vector = -np.ones( constraintsN )
    
    sol = linprog( c = obj, A_ub = A, b_ub = constraint_vector, method = 'highs-ds')
    
    if sol.status == 0:
        res = True
    else:
        res = False
    return res


#=============================================================================
#INPUT FILE
#=============================================================================
df = pd.read_csv( 'ITCR_cdata.csv' )
ids = sorted( set( df['participantid'] ) )

#=============================================================================
#MODES AND COLUMNS
#=============================================================================
tests_dict = {'def': [], \
              'imp': [impatience2], \
              'pat': [patience2], \
              'du': None}

columns = []
for test_name in tests_dict:
    columns.append( test_name + ' notime' )
    columns.append( test_name + ' notime resample')
    columns.append( test_name + ' time')

#-------------------
#manage results file
#-------------------
results_file = 'time_analysis_by_participant.xlsx'
results_df = None
if isfile( results_file ):
    results_df = pd.read_excel( results_file, sheet_name = 0, index_col = 0 )
else:
    #Make a new results df
    results_df = pd.DataFrame( np.nan, index = ids, columns = columns )
    results_df.index.name = 'participantid'

#=============================================================================
#DATA SPLITTING
#=============================================================================

df_notime = df[ (df['pz'] != 66666.) & (df['pw'] != 66666.) ].copy()
df_time = df[ (df['pz'] == 66666.) & (df['pw'] == 66666.) ].copy()

df_time = df_time.drop(['pz','pw', 'z', 'w'],axis=1)
df_time = df_time.rename( { 'px': 'p1', 'py': 'p2', 'x': 'q1', 'y': 'q2' }, axis=1 )
df_time.index = range(df_time.shape[0])

df1 = df_notime[ ['participantid','px','py','x','y'] ].copy()
df2 = df_notime[ ['participantid','pz','pw','z','w'] ].copy()
df1.columns = [ 'participantid', 'p1', 'p2', 'q1', 'q2' ]
df2.columns = [ 'participantid', 'p1', 'p2', 'q1', 'q2' ]

df_notime = pd.concat([df1,df2], axis=0, ignore_index = True)
df_notime = df_notime.sort_values(by='participantid')
df_notime.index = range(df_notime.shape[0])

samplesN = 100
for pid in ids:
    
    if np.isnan( results_df.loc[pid,:]).any():
        
        print('working on {0}'.format(pid))
        cur_df_notime = df_notime[ df_notime['participantid'] == pid ]
        cur_df_time = df_time[ df_time['participantid'] == pid]
        
        for test_name in tests_dict:
            #Do time version
            C = cur_df_time[['q1', 'q2']].to_numpy()
            P = cur_df_time[['p1', 'p2']].to_numpy()
            if test_name == 'du':
                e = PQR_time_test_find_e(C,P)
            else:
                e = garp_NOQ_find_e(C,P,Ilist= tests_dict[test_name])
            results_df.loc[pid,test_name + ' time'] = e
            
            C = cur_df_notime[['q1', 'q2']].to_numpy()
            P = cur_df_notime[['p1', 'p2']].to_numpy()
            if test_name == 'du':
                e = PQR_time_test_find_e(C,P)
            else:
                e = garp_NOQ_find_e(C,P,Ilist= tests_dict[test_name])
            results_df.loc[pid,test_name + ' notime'] = e
            
            samples_array = np.repeat( 0., samplesN )
            for samplei in range(samplesN):
                cur_df_notime_sample = cur_df_notime.sample(11)
                C = cur_df_notime_sample[['q1','q2']].to_numpy()
                P = cur_df_notime_sample[['p1','p2']].to_numpy()
                if test_name == 'du':
                    e = PQR_time_test_find_e(C,P)
                else:
                    e = garp_NOQ_find_e(C,P,Ilist= tests_dict[test_name])
                samples_array[samplei] = e
            results_df.loc[pid,test_name + ' notime resample'] = samples_array.mean()
        
        results_df.to_excel(results_file)

#=============================================================================
#CATEGORIZE PARTICIPANTS
#=============================================================================
        
df = df[ (df['pz'] != 66666.) & (df['pw'] != 66666.) ].copy()

df['edges'] = 0
df['corners'] = 0
zeros_count = (df[['x','y','z','w']] == 0.).sum(1)
df.loc[ zeros_count >= 1, 'edges' ] = 1
df.loc[ zeros_count >= 3, 'corners' ] = 1

char_df = df[['participantid','edges','corners']].groupby('participantid').sum()

results_df = pd.concat( [results_df, char_df], axis=1 )

#=============================================================================
#SUMMARIZE RESULTS
#=============================================================================
summary_file = 'time_analysis_summary.xlsx'

#columns and row names
cats = ['all', 'not edge', 'edge']
cat_varsin = ['edges', 'corners']
escores = [100,99,95,90]

my_index = pd.MultiIndex.from_product( [cats, escores], names = ['category','escore'] )

summary_df = pd.DataFrame(data=np.nan, index = my_index, columns = columns)

for cat in cats:
    for escore in escores:

        if cat == 'all':
            tempdf = results_df.copy()
        elif cat == 'not edge':
            tempdf = results_df[ results_df['edges'] < 41 ].copy()
        elif cat == 'edge':
            tempdf = results_df[ results_df['edges'] == 41 ].copy()
        else:
            raise Exception('invalid cat {0}'.format(cat))
        tempdf.drop(labels=cat_varsin,axis=1,inplace=True)
        tempdf[tempdf < (escore / 100.)] = 0
        tempdf[tempdf >= (escore / 100.)] = 1
        summary_df.loc[ (cat,escore) ] = tempdf.mean()

summary_df.to_excel(summary_file)

